Preparations

Load the necessary libraries

library(mgcv)      #for GAMs
library(broom)     #for tidy results
library(gratia)    #for GAM plots
library(DHARMa)    #for residual diagnostics
library(performance) #for residuals diagnostics
library(see)         #for plotting residuals
library(emmeans)   #for marginal means etc
library(MuMIn)     #for model selection and AICc
library(tidyverse) #for data wrangling

Scenario

This is an entirely fabricated example (how embarrising). So here is a picture of some Red Grouse Chicks to compensate..

Red grouse chicks

Format of data.gp.csv data file

x y
2 3
4 5
8 6
10 7
14 4
x - a continuous predictor
y - a continuous response

Read in the data

data_gam = read_csv('../public/data/data_gam.csv', trim_ws=TRUE)
## 
## ── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## cols(
##   x = col_double(),
##   y = col_double()
## )
glimpse(data_gam)
## Rows: 5
## Columns: 2
## $ x <dbl> 2, 4, 8, 10, 14
## $ y <dbl> 3, 5, 6, 7, 4

Exploratory data analysis

Model formula: \[ y_i \sim{} \mathcal{N}(\mu_i, \sigma^2)\\ \mu_i =\beta_0 + f(x_i)\\ f(x_i) = \sum^k_{j=1}{b_j(x_i)\beta_j} \]

where \(\beta_0\) is the y-intercept, and \(f(x)\) indicates an additive smoothing function of \(x\).

Fit the model

Model validation

Partial plots

Model investigation / hypothesis testing

Further analyses

Summary figures

data_gam.list <- with(data_gam, list(x = modelr::seq_range(x, n = 100)))

newdata = emmeans(data_gam.gam, ~x, at = data_gam.list) %>%
    as.data.frame
head(newdata)
ggplot(newdata, aes(y=emmean, x=x)) +
    geom_ribbon(aes(ymin=lower.CL, ymax=upper.CL), fill='blue', alpha=0.3) +
    geom_line() +
    geom_point(data=data_gam, aes(y=y,x=x))+
    theme_bw()

newdata <- newdata %>% mutate(Peak = between(emmean, 7.91, 9.48))
ggplot(newdata, aes(y=emmean, x=x)) +
    geom_ribbon(aes(ymin=lower.CL, ymax=upper.CL), fill='blue', alpha=0.3) +
    geom_line(aes(color=Peak)) +
    geom_point(data=data_gam, aes(y=y,x=x))+
    theme_bw()

## Partials
#resid.list = list(x=data_gam$x)
newdata.partial = data_gam %>%
  mutate(Pred = predict(data_gam.gam,  type='link'),
         Res = resid(data_gam.gam),
         Resid = Pred + Res)
#newdata.partial = emmeans(data_gam.gam, ~x, at=resid.list) %>%
#    as.data.frame %>%
#    mutate(Resid = emmean + resid(data_gam.gam))

ggplot(newdata, aes(y=emmean, x=x)) +
    geom_ribbon(aes(ymin=lower.CL, ymax=upper.CL), fill='blue', alpha=0.3) +
    geom_line() +
    geom_point(data=newdata.partial, aes(y=Resid,x=x))+
    theme_bw()

# References